# Alex Gazmararian
# agazmararian@gmail.com

library(ggtext)
library(scales)
source("analysis/visibility/analysis_config.R")
source(here("R", "proximity_analysis_functions.R"))
source(here("R", "visibility", "replication_mode.R"))
library(sensemakr)

# Initialize replication mode (auto-detects based on available data)
REPLICATION_MODE <- init_replication_mode()
message("=== PROXIMITY ANALYSIS ===")
message("Replication mode: ", REPLICATION_MODE)

# Set default for pnas flag if not already defined (used by pnas_script_runner.R)
if (!exists("pnas")) pnas <- TRUE

g <- readRDS(here("data", "output", "visibility_analysis.rds"))

# Descriptive of what type of project----
# How many people report seeing green projects
visibility.mean <- round(weighted.mean(g$greenproj_bin, g$wt_all_trim) * 100, 0)
print(paste0("Weighted mean of people reporting seeing green projects: ", visibility.mean, "%"))
writeLines(as.character(visibility.mean), paste0("output/pnas/stats/visibility_mean.txt"))

g %>%
  group_by(greenproj) %>%
  summarize(n = sum(wt_all_trim)) %>%
  mutate(prop = n / sum(n))

g$wt_green_type_trim %>% summary()

g %>%
  filter(!is.na(proj_solar) | !is.na(proj_wind) | !is.na(proj_geo) | !is.na(proj_bat) | !is.na(proj_ev) | !is.na(proj_h)) %>%
  filter(greenproj_bin == 1) %>%
  pivot_longer(cols = c(proj_solar, proj_wind, proj_geo, proj_bat, proj_ev, proj_h), names_to = "proj", values_to = "proj_bin") %>%
  mutate(proj_bin = as.integer(proj_bin == "Yes")) %>%
  group_by(proj) %>%
  summarize(prop = weighted.mean(proj_bin, wt_green_type_trim))


# Placebo check----
est_model(varname = "d_battery_2y_q", outcome = "greenproj_bin", data.in = g)
est_model(varname = "d_battery_2y_q", outcome = "credit_biden_bin", data.in = g)
est_model(varname = "d_battery_2y_q", outcome = "greenbenefit_bin", data.in = g)

# Effect of Proximity on Visibility and Attribution----
# Treatment variables from configuration
var.list <- treat.vars
message("Estimating models for ", paste(names(treat.list), ":", var.list, collapse = ", "))

# Note: Using default behavior of create_proximity_table() which automatically
# filters the global coefmap to show only proximity coefficients that exist in models

# The main results table uses the global coefmap which now includes all proximity variables
# No need for dynamic_coefmap since the global coefmap is comprehensive

# Check for missing values in covar.list
message("Checking for missing values in covariates")
g %>%
  select(all_of(covar.list)) %>%
  summarise(across(everything(), ~ sum(is.na(.))))

# Estimate models for all main outcomes using new helper function
message("Estimating models for main outcomes")
all_models <- estimate_proximity_models(
  var.list = var.list,
  data.in = g,
  outcomes = c("greenproj_bin", "credit_biden_bin", "greenbenefit_bin")
)

# Extract models for backward compatibility
m.recog <- all_models$greenproj_bin
m.credit <- all_models$credit_biden_bin  
m.benefit <- all_models$greenbenefit_bin

#' Conduct equivalence test (TOST) for a coefficient
#' @param model Fitted model object
#' @param term_name Character string of the coefficient term to test
#' @param low Lower equivalence bound (default: -0.1)
#' @param high Upper equivalence bound (default: 0.1)
#' @param alpha Significance level (default: 0.05)
#' @return List with test results including estimate, standard error, p-values, equivalence conclusion, and CI
equivalence_test <- function(model, term_name, bounds = .1, alpha = 0.05) {
  
  # Extract coefficient and standard error
  model_tidy <- broom::tidy(model)
  coef_row <- model_tidy[model_tidy$term == term_name, ]
  
  if (nrow(coef_row) == 0) {
    stop("Term '", term_name, "' not found in model")
  }
  
  est <- coef_row$estimate[1]
  se <- coef_row$std.error[1]
  df <- df.residual(model)
  
  # TOST test statistics and p-values
  tL <- (est + bounds) / se
  pL <- 1 - pt(tL, df)
  
  tU <- (bounds - est) / se
  pU <- 1 - pt(tU, df)
  
  # Equivalence conclusion (both one-sided tests must be significant)
  equiv <- (pL < alpha) && (pU < alpha)
  
  # Confidence interval for the coefficient (TOST logic at alpha level)
  tcrit <- qt(1 - alpha, df)
  ci <- c(est - tcrit*se, est + tcrit*se)
  
  return(list(
    estimate = est,
    std.error = se,
    df = df,
    p_lower = pL,
    p_upper = pU,
    equivalent = equiv,
    confidence_interval = ci,
    bounds = c(-bounds, bounds),
    alpha = alpha,
    term = term_name
  ))
}

#' Find the smallest equivalence bound where equivalence still holds
#' @param model Fitted model object
#' @param term_name Character string of the coefficient term to test
#' @param outcome_name Character string describing the outcome (for filename)
#' @param treatment_name Character string describing the treatment (for filename)
#' @param alpha Significance level (default: 0.05)
#' @param start_bound Starting bound to search from (default: 0.2)
#' @param min_bound Minimum bound to consider (default: 0.001)
#' @param step_size Initial step size for search (default: 0.01)
#' @param tolerance Precision tolerance (default: 0.001)
#' @return List with minimum bound and test results
find_minimum_equivalence_bound <- function(model, term_name, outcome_name = "", treatment_name = "",
                                          alpha = 0.05, start_bound = 0.2, min_bound = 0.001, 
                                          step_size = 0.01, tolerance = 0.001) {
  
  # First check if equivalence holds at the starting bound
  test_result <- equivalence_test(model, term_name, bounds = start_bound, alpha = alpha)
  
  if (!test_result$equivalent) {
    return(list(
      minimum_bound = NA,
      equivalent_at_minimum = FALSE,
      message = paste("No equivalence found even at starting bound of", start_bound)
    ))
  }
  
  # Binary search approach
  upper_bound <- start_bound  # We know this works
  lower_bound <- min_bound    # Start from minimum
  
  best_bound <- upper_bound
  
  while ((upper_bound - lower_bound) > tolerance) {
    mid_bound <- (upper_bound + lower_bound) / 2
    
    test_result <- equivalence_test(model, term_name, bounds = mid_bound, alpha = alpha)
    
    if (test_result$equivalent) {
      # Equivalence holds, try smaller bound
      best_bound <- mid_bound
      upper_bound <- mid_bound
    } else {
      # Equivalence doesn't hold, need larger bound
      lower_bound <- mid_bound
    }
  }
  
  # Final test at the best bound found
  final_test <- equivalence_test(model, term_name, bounds = best_bound, alpha = alpha)
  
  # Write minimum bound to file in percentage format if treatment and outcome names provided
  if (outcome_name != "" && treatment_name != "") {
    bound_percentage <- round(best_bound * 100, 1)
    filename <- paste0("output/pnas/stats/min_equiv_bound_", outcome_name, "_", treatment_name, ".txt")
    writeLines(as.character(bound_percentage), filename)
  }
  
  return(list(
    minimum_bound = best_bound,
    equivalent_at_minimum = final_test$equivalent,
    final_test_results = final_test,
    message = paste("Minimum equivalence bound:", round(best_bound, 4))
  ))
}

# Equivalence tests for credit
equiv_test_credit_re_q1 <- equivalence_test(
  model = m.credit[[var.list[1]]], 
  term_name = paste0(var.list[1], "Q1")
)
equiv_test_credit_re_q1

# Find minimum equivalence bound
min_bound_credit_re_q1 <- find_minimum_equivalence_bound(
  model = m.credit[[var.list[1]]], 
  term_name = paste0(var.list[1], "Q1"),
  outcome_name = "credit_biden",
  treatment_name = "renewable_energy"
)
min_bound_credit_re_q1

min_bound_credit_mfg_q1 <- find_minimum_equivalence_bound(
  model = m.credit[[var.list[2]]], 
  term_name = paste0(var.list[2], "Q1"),
  outcome_name = "credit_biden",
  treatment_name = "manufacturing"
)
min_bound_credit_mfg_q1

# Extract treatment effect for Q1 and visibility model
treat_effect_q1 <- broom::tidy(m.recog[[var.list[1]]])[which(broom::tidy(m.recog[[var.list[1]]])$term == paste0(var.list[1], "Q1")), "estimate"][[1]]
writeLines(format(treat_effect_q1*100, digits = 2), paste0("output/pnas/stats/treat_effect_", var.list[1], "_q1_recog.txt"))

treat_effect2_q1 <- broom::tidy(m.recog[[var.list[2]]])[which(broom::tidy(m.recog[[var.list[2]]])$term == paste0(var.list[2], "Q1")), "estimate"][[1]]
writeLines(format(treat_effect2_q1*100, digits = 2), paste0("output/pnas/stats/treat_effect_", var.list[2], "_q1_recog.txt"))

# Check within-state variation for visibility outcome
check_within_state_variation("greenproj_bin", g, m.recog, var.list)

# Wald test
for(i in seq_along(var.list)) {
  var_name <- var.list[i]
  var_label <- ifelse(grepl("_re_", var_name), "renewable energy", "manufacturing")
  message("Wald test for visibility: ", var_label)
  print(wald(m.recog[[var_name]], keep = paste0(var_name, "Q[12]$")))
  writeLines(format(wald(m.recog[[var_name]], keep = paste0(var_name, "Q[12]$"))$p, digits = 2), paste0("output/pnas/stats/wald_test_recog_", var_label, "_q12.txt"))
  print(wald(m.recog[[var_name]], keep = paste0(var_name, "Q[34]$")))
  writeLines(format(wald(m.recog[[var_name]], keep = paste0(var_name, "Q[34]$"))$p, digits = 2), paste0("output/pnas/stats/wald_test_recog_", var_label, "_q34.txt"))
}

# Create Fig. 2, Panel A: effect of proximity on visibility
p.recognition <- create_proximity_plot(
  models = m.recog,
  title = "Visibility of local clean energy projects",
  y_label = "Effect on visibility probability",
  data.in = g,
  outcome = "greenproj_bin",
  legend.position = c(.15, .1)
)

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.recognition, here("output", "pnas", "figures", "fig_proximity_effect_recognition.pdf"))
}
save(p.recognition, file = here("data", "output", "fig_proximity_effect_recognition.RData"))

# Check within-state variation for credit attribution outcome  
check_within_state_variation("credit_biden_bin", g, m.credit, var.list)

# Create Fig. 2, Panel B: effect of proximity on credit attribution
p.credit <- create_proximity_plot(
  models = m.credit,
  title = "Traceability of clean energy projects to Biden Administration",
  y_label = "Effect on credit probability", 
  data.in = g,
  outcome = "credit_biden_bin",
  legend.position = "none"
)

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.credit, here("output", "pnas", "figures", "fig_proximity_effect_credit.pdf"))
}
save(p.credit, file = here("data", "output", "fig_proximity_effect_credit.RData"))

# Check within-state variation for perceived benefits outcome
check_within_state_variation("greenbenefit_bin", g, m.benefit, var.list)

# Create Fig 2, Panel C: effect of proximity on perceived benefits  
p.benefit <- create_proximity_plot(
  models = m.benefit,
  title = "Perceived benefits of local clean energy projects",
  y_label = "Effect on perceived benefits",
  data.in = g,
  outcome = "greenbenefit_bin", 
  legend.position = "none"
)

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.benefit, here("output", "pnas", "figures", "fig_proximity_effect_benefit.pdf"))
}
save(p.benefit, file = here("data", "output", "fig_proximity_effect_benefit.RData"))

# Main Results Regression Table----
m.table <- c(m.recog, m.credit, m.benefit)
# Use descriptive labels from configuration
table_labels <- rep(tools::toTitleCase(gsub("_", " ", treat.labels)), 3)
names(m.table) <- table_labels

vis.cap <- paste0("Linear probability models of proximity's effect on project visibility, credit attribution, and perceived benefits")

vis.notes <- paste0(
  "\\textit{Notes:} Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.table,
  title = vis.cap,
  notes = vis.notes,
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S6_proximity_effect.tex"),
  label = "tab:proximity",
  coef_map = coefmap,
  resize_width = .77
)

# Sensitivity analysis----
# Uses Conley SEs for auxiliary regressions to calculate partial R-squared.
# In anonymized mode, we use cached statistics from full mode run.

# Define benchmark variables for sensitivity analysis
# These should be strong predictors in your covariate list
benchmark_vars <- list(
  manufacturing = "force_ln_z",  # For manufacturing proximity models
  renewable = "income5Q4"        # For renewable energy proximity models  
)

# Cache file for sensitivity analysis statistics
sensitivity_cache_file <- here("data", "cache", "sensitivity_stats_cache.rds")

# Function to get sensitivity statistics (from cache or fresh computation)
get_sensitivity_stats <- function(treatment_var, data, benchmark_var, covar_list) {
  
  cache_key <- paste0("sensitivity_", treatment_var, "_", benchmark_var)
  
  if (REPLICATION_MODE == "anonymized") {
    # Load from cache
    if (!file.exists(sensitivity_cache_file)) {
      stop("Anonymized mode requires cached sensitivity statistics. File not found: ", sensitivity_cache_file)
    }
    cache <- readRDS(sensitivity_cache_file)
    if (!cache_key %in% names(cache)) {
      stop("Sensitivity statistics for '", treatment_var, "' not found in cache")
    }
    return(cache[[cache_key]])
  }
  
  # Full mode: compute fresh with Conley SEs
  # Regress first level of the treatment on the covariates
  treat_dist <- feols(
    as.formula(paste0("I(", treatment_var, " == 'Q1') ~ ", paste(covar_list, collapse = "+"), "| state")),
    data = subset(data, data[[treatment_var]] %in% c("Q1", "Q5")),
    vcov = vcov_conley(cutoff = 50, lat = "lat_zip", lon = "lon_zip")
  )
  
  # Extract statistics for treatment model
  dof.x <- degrees_freedom(treat_dist, "resid")
  d.stat <- broom::tidy(treat_dist)[which(broom::tidy(treat_dist)$term == benchmark_var), "statistic"][[1]]
  
  # Regress outcome on covariates
  outcome_model <- feols(
    as.formula(paste0("greenproj_bin ~ ", paste(covar_list, collapse = "+"), "| state")),
    data = subset(data, data[[treatment_var]] %in% c("Q1", "Q5")),
    vcov = vcov_conley(cutoff = 50, lat = "lat_zip", lon = "lon_zip")
  )
  
  # Extract statistics for outcome model
  dof.y <- degrees_freedom(outcome_model, "resid")
  y.stat <- broom::tidy(outcome_model)[which(broom::tidy(outcome_model)$term == benchmark_var), "statistic"][[1]]
  
  # Return statistics
  list(dof.x = dof.x, d.stat = d.stat, dof.y = dof.y, y.stat = y.stat)
}

# Function to run sensitivity analysis for a given treatment variable
run_sensitivity_analysis <- function(treatment_var, models, data, benchmark_var, covar_list, table_no) {
  
  # Determine variable type and labels
  var_type <- ifelse(grepl("_re_", treatment_var), "renewable", "manufacturing")
  var_label <- ifelse(var_type == "renewable", "Renewable energy proximity (Q1)", "Manufacturing proximity (Q1)")
  
  message("Running sensitivity analysis for: ", treatment_var)
  
  # Get statistics (from cache or fresh computation)
  stats <- get_sensitivity_stats(treatment_var, data, benchmark_var, covar_list)
  
  # Calculate partial R-squared
  r2dxj.x <- partial_r2(t_statistic = stats$d.stat, dof = stats$dof.x)
  r2yxj.dx <- partial_r2(t_statistic = stats$y.stat, dof = stats$dof.y)
  
  # Extract observed estimate
  estimate_in <- models[[treatment_var]] %>%
    broom::tidy() %>%
    dplyr::filter(term == paste0(treatment_var, "Q1"))
  
  # Run sensemakr analysis
  sense_out <- sensemakr(
    estimate = estimate_in$estimate,
    se = estimate_in$std.error,
    r2dxj.x = r2dxj.x,
    r2yxj.dx = r2yxj.dx,
    dof = degrees_freedom(models[[treatment_var]], "resid"),
    kd = 1
  )
  
  # Add bound labels based on the benchmark variable
  benchmark_label <- if (benchmark_var %in% names(coefmap)) {
    coefmap[[benchmark_var]]
  } else {
    benchmark_var  # Fallback to variable name if not in coefmap
  }
  sense_out$bounds$bound_label <- paste0("1x ", benchmark_label)
  
  # Sensitivity figures are diagnostic only (tables shown in paper)
  if (isFALSE(pnas)) {
    pdf(here("output", "pnas", "figures", paste0("fig_proximity_effect_", var_type, "_sensitivity.pdf")))
    plot(sense_out)
    dev.off()
  }

  # Generate sensitivity table
  file_suffix <- ifelse(var_type == "renewable", "re", "mfg")
  generate_sensitivity_table(
    sense_model = sense_out,
    outcome_label = "Visibility (=1)",
    treatment_label = var_label,
    file_name = here("output", "pnas", "tables", paste0("tab_", table_no, "_proximity_effect_", file_suffix, "_sensitivity.tex")),
    caption = paste("Sensitivity analysis for visibility outcome,", tolower(var_label), "model"),
    label = paste0("proximity_effect_", file_suffix, "_sensitivity"),
    resize_width = 1,
    digits = 3,
    verbose = FALSE
  )
  
  return(sense_out)
}

# Run sensitivity analysis for each treatment variable
# In full mode: computes fresh and caches statistics
# In anonymized mode: loads cached statistics
sensitivity_results <- list()
sensitivity_cache <- list()

for(i in seq_along(var.list)) {
  treatment_var <- var.list[i]
  var_type <- ifelse(grepl("_re_", treatment_var), "renewable", "manufacturing")
  benchmark_var <- benchmark_vars[[var_type]]
  
  # In full mode, cache the statistics for anonymized replication
  if (REPLICATION_MODE == "full") {
    cache_key <- paste0("sensitivity_", treatment_var, "_", benchmark_var)
    sensitivity_cache[[cache_key]] <- get_sensitivity_stats(treatment_var, g, benchmark_var, covar.list)
  }
  
  sensitivity_results[[treatment_var]] <- run_sensitivity_analysis(
    treatment_var = treatment_var,
    models = m.recog,
    data = g,
    benchmark_var = benchmark_var,
    covar_list = covar.list,
    table_no = paste0("S", i + 6)
  )
}

# Save sensitivity cache in full mode
if (REPLICATION_MODE == "full" && length(sensitivity_cache) > 0) {
  saveRDS(sensitivity_cache, sensitivity_cache_file)
  message("Cached sensitivity statistics for anonymized replication: ", sensitivity_cache_file)
}

# Robustness Tests----
robustness_specs <- list(
  "geo_subset" = list(subset_condition = "loc_anom == 0"),
  "100km" = list(cutoff = 100),
  "200km" = list(cutoff = 200),
  "spherical" = list(conley.distance = "spherical"),
  "state_se" = list(se = "state"),
  "dma" = list(fe = "| dma")
)

## 1 year temporal window----
# Robustness test using 1-year temporal window instead of 2-year window

message("=== Robustness Test: 1-Year Temporal Window ===")

# Treatment variables for 1-year temporal window
var.list.1y <- c("d_re_q", "d_mfg_open_q")
message("Estimating models for 1-year window: ", paste(var.list.1y, collapse = ", "))

# Estimate models for all main outcomes using 1-year temporal window
message("Estimating models for main outcomes (1-year window)")
all_models_1y <- estimate_proximity_models(
  var.list = var.list.1y,
  data.in = g,
  outcomes = c("greenproj_bin", "credit_biden_bin", "greenbenefit_bin")
)

# Extract models for backward compatibility
m.recog.1y <- all_models_1y$greenproj_bin
m.credit.1y <- all_models_1y$credit_biden_bin
m.benefit.1y <- all_models_1y$greenbenefit_bin

# Check within-state variation for visibility outcome (1-year window)
check_within_state_variation("greenproj_bin", g, m.recog.1y, var.list.1y)

# Wald test for 1-year window
for(i in seq_along(var.list.1y)) {
  var_name <- var.list.1y[i]
  var_label <- ifelse(grepl("_re_", var_name), "renewable energy", "manufacturing")
  message("Wald test for visibility (1-year window): ", var_label)
  print(wald(m.recog.1y[[var_name]], keep = paste0(var_name, "Q[12]$")))
  print(wald(m.recog.1y[[var_name]], keep = paste0(var_name, "Q[34]$")))
}

# Combine models for comparison table
m.table.1y <- c(m.recog.1y, m.credit.1y, m.benefit.1y)
names(m.table.1y) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

vis.cap.1y <- paste0("Robustness to 1-year temporal window: Linear probability models of project visibility, credit attribution, and perceived benefits")

vis.notes.1y <- paste0(
  "\\textit{Notes:} ",
  "This analysis uses projects that opened within 1 year of survey completion instead of the main 2-year window. ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.table.1y,
  title = vis.cap.1y,
  notes = vis.notes.1y,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S18_proximity_effect_1year.tex"),
  label = "tab:proximity_effect_1year",
  resize_width = 1
)


## Geographic identifiers----

robustness_results <- run_robustness_analysis(
  var.list = var.list,
  data.in = g,
  outcomes = c("greenproj_bin", "credit_biden_bin", "greenbenefit_bin"),
  specs = robustness_specs
)

# Extract geographic subset results
m.geo <- c(
  robustness_results$geo_subset$greenproj_bin,
  robustness_results$geo_subset$credit_biden_bin,
  robustness_results$geo_subset$greenbenefit_bin
)
names(m.geo) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

vis.geo.cap <- paste0(
  "Robustness to alternative geocoordinates: ",
  "Linear probability models of project visibility and credit attribution."
  )

vis.geo.notes <- paste0(
  "\\textit{Notes:} ",
  "This analysis is performed on the subset of respondents whose IP addresses and ZIP codes imply similar longitude-latitude geo-coordinates. ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.geo,
  title = vis.geo.cap,
  notes = vis.geo.notes,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S12_proximity_effect_geo.tex"),
  label = "tab:proximity_effect_geo",
  resize_width = 1
)

## Alternative Conley standard error cutoffs----

# Extract 100 km results
m.100 <- c(
  robustness_results$`100km`$greenproj_bin,
  robustness_results$`100km`$credit_biden_bin,
  robustness_results$`100km`$greenbenefit_bin
)
names(m.100) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.100 <- paste0(
  "Robustness to 100 km Conley standard error cutoff: ",
  "Linear probability models of project visibility and credit attribution."
)
notes.100 <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (100 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.100,
  title = cap.100,
  notes = notes.100,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S9_proximity_effect_100.tex"),
  label = "tab:proximity_effect_100",
  resize_width = 1
)

## 200 km cutoff
m.200 <- c(
  robustness_results$`200km`$greenproj_bin,
  robustness_results$`200km`$credit_biden_bin,
  robustness_results$`200km`$greenbenefit_bin
)
names(m.200) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.200 <- paste0(
  "Robustness to 200 km Conley standard error cutoff: ",
  "Linear probability models of project visibility and credit attribution."
)
notes.200 <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (200 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.200,
  title = cap.200,
  notes = notes.200,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S10_proximity_effect_200.tex"),
  label = "tab:proximity_effect_200",
  resize_width = 1
)

## Different distance metrics
# Extract spherical distance results
m.sph <- c(
  robustness_results$spherical$greenproj_bin,
  robustness_results$spherical$credit_biden_bin,
  robustness_results$spherical$greenbenefit_bin
)
names(m.sph) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.sph <- paste0(
  "Robustness to spherical distance metric: ",
  "Linear probability models of project visibility and credit attribution."
)
notes.sph <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (50 km threshold, spherical distance). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.sph,
  title = cap.sph,
  notes = notes.sph,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S13_proximity_effect_sph.tex"),
  label = "tab:proximity_effect_sph",
  resize_width = 1
)

## State-clustered standard errors----
# Extract state-clustered SE results
m.state <- c(
  robustness_results$state_se$greenproj_bin,
  robustness_results$state_se$credit_biden_bin,
  robustness_results$state_se$greenbenefit_bin
)
names(m.state) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.state <- paste0(  
  "Robustness to state-clustered standard errors: ",
  "Linear probability models of project visibility and credit attribution."
)
notes.state <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with cluster-robust standard errors by state. ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.state,
  title = cap.state,
  notes = notes.state,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S11_proximity_effect_state.tex"),
  label = "tab:proximity_effect_state",
  resize_width = 1
)


## DMA fixed effects----

m.dma <- c(
  robustness_results$dma$greenproj_bin,
  robustness_results$dma$credit_biden_bin,
  robustness_results$dma$greenbenefit_bin
)
names(m.dma) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.dma <- paste0(
  "Robustness to DMA fixed effects: ",
  "Linear probability models of project visibility and credit attribution."
)
notes.dma <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.dma,
  title = cap.dma,
  notes = notes.dma,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S16_proximity_effect_dma.tex"),
  label = "tab:proximity_effect_dma",
  resize_width = 1,
  fixed_effects_label = "DMA Fixed Effects"
)

## Continuous distance specification----
# Create continuous distance variable names from quintile variable names
continuous_vars <- gsub("_q$", "", var.list)  # Remove "_q" suffix to get base distance variables

# Visibility models with continuous distance
m.recog.log <- lapply(continuous_vars, function(x) est_model(varname = paste0("log(", x, ")"), data.in = g))
names(m.recog.log) <- continuous_vars

# Credit models with continuous distance  
m.credit.log <- lapply(continuous_vars, function(x) est_model(varname = paste0("log(", x, ")"), outcome = "credit_biden_bin", data.in = g))
names(m.credit.log) <- continuous_vars

# Benefit models with continuous distance
m.benefit.log <- lapply(continuous_vars, function(x) est_model(varname = paste0("log(", x, ")"), outcome = "greenbenefit_bin", data.in = g))
names(m.benefit.log) <- continuous_vars

# Combine models for table - use dynamic naming
m.log <- c(m.recog.log, m.credit.log, m.benefit.log)
names(m.log) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.log <- paste0(
  "Robustness to continuous distance specification: ",
  "Linear probability models of project visibility, credit attribution, and perceived benefits."
)
notes.log <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Distance is log-transformed (continuous). ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.log,
  title = cap.log,
  notes = notes.log,
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S14_proximity_effect_continuous.tex"),
  label = "tab:proximity_effect_continuous",
  coef_map = c(
    "log(d_re_2y)" = "Distance to renewables (log)",
    "log(d_mfg_open_2y)" = "Distance to manufacturing (log)"
  ),
  resize_width = 1
)

## Survey weights----
m.recog.wt <- lapply(var.list, est_model, weights = g$wt_all_trim, data.in = g)
names(m.recog.wt) <- var.list

m.credit.wt <- lapply(var.list, est_model, outcome = "credit_biden_bin", weights = g$wt_credit_trim, data.in = g)
names(m.credit.wt) <- var.list

m.benefit.wt <- lapply(var.list, est_model, outcome = "greenbenefit_bin", weights = g$wt_benefit_trim, data.in = g)
names(m.benefit.wt) <- var.list

m.wt <- c(m.recog.wt, m.credit.wt, m.benefit.wt)
names(m.wt) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.wt <- paste0(
  "Robustness to survey weights: ",
  "Linear probability models of project visibility, credit attribution, and perceived benefits."
)

notes.wt <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are weighted OLS (ACS-raked weights) with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.wt,
  title = cap.wt,
  notes = notes.wt,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Visibility", "Credit Biden", "Benefits"),
  filename = here("output", "pnas", "tables", "tab_S15_proximity_effect_weights.tex"),
  label = "tab:proximity_effect_weights",
  resize_width = 1
)

## Different model specifications----

message("=== Robustness Test: Alternative Covariate Specifications ===")

# Define covariate specifications
message("Defining covariate specifications...")

# Specification 1: Only state fixed effects (minimal controls)
covar.state.only <- "1"

# Specification 2: State FE + individual-level characteristics
covar.individual <- covar.list[1:11]  # Assuming first 11 are individual characteristics

# Specification 3: State FE + contextual/economic factors
covar.contextual <- covar.list[21:24]  # Assuming these are economic/contextual characteristics

## State fixed effects only----
message("=== Specification 1: State Fixed Effects Only ===")

# Visibility models (use unique model_name to avoid cache collision)
m.recog.state <- lapply(var.list, function(x) est_model(varname = x, data.in = g, covar.list = covar.state.only, 
                                                         model_name = paste0("greenproj_bin_", x, "_conley_50_state_only")))
names(m.recog.state) <- var.list

# Credit attribution models
m.credit.state <- lapply(var.list, function(x) est_model(varname = x, outcome = "credit_biden_bin", data.in = g, covar.list = covar.state.only,
                                                          model_name = paste0("credit_biden_bin_", x, "_conley_50_state_only")))
names(m.credit.state) <- var.list

# Perceived benefits models
m.benefit.state <- lapply(var.list, function(x) est_model(varname = x, outcome = "greenbenefit_bin", data.in = g, covar.list = covar.state.only,
                                                           model_name = paste0("greenbenefit_bin_", x, "_conley_50_state_only")))
names(m.benefit.state) <- var.list

# Combine for table
m.table.state <- c(m.recog.state, m.credit.state, m.benefit.state)
names(m.table.state) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

vis.cap.state <- paste0("Robustness to minimal controls: State fixed effects only")

vis.notes.state <- paste0(
  "\\textit{Notes:} ",
  "This specification includes only state fixed effects and survey wave controls, without individual or contextual covariates. ",
  "Each column reports a separate linear probability model. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley SEs (50 km threshold). ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

# Diagnostic table (not in paper)
if (isFALSE(pnas)) {
  create_proximity_table(
    models = m.table.state,
    title = vis.cap.state,
    notes = vis.notes.state,
    column_labels = c("Visibility", "Credit Biden", "Benefits"),
    filename = here("output", "pnas", "tables", "tab_proximity_effect_state_only.tex"),
    label = "tab:proximity_effect_state_only",
    coef_map = coefmap[grepl("^d_", names(coefmap))],
    resize_width = .65
  )
}

## State FE + individual characteristics----
message("=== Specification 2: State FE + Individual Characteristics ===")

# Visibility models (use unique model_name to avoid cache collision)
m.recog.individual <- lapply(var.list, function(x) est_model(varname = x, data.in = g, covar.list = covar.individual,
                                                              model_name = paste0("greenproj_bin_", x, "_conley_50_individual")))
names(m.recog.individual) <- var.list

# Credit attribution models
m.credit.individual <- lapply(var.list, function(x) est_model(varname = x, outcome = "credit_biden_bin", data.in = g, covar.list = covar.individual,
                                                               model_name = paste0("credit_biden_bin_", x, "_conley_50_individual")))
names(m.credit.individual) <- var.list

# Benefit models
m.benefit.individual <- lapply(var.list, function(x) est_model(varname = x, outcome = "greenbenefit_bin", data.in = g, covar.list = covar.individual,
                                                                model_name = paste0("greenbenefit_bin_", x, "_conley_50_individual")))
names(m.benefit.individual) <- var.list

# Combine for table
m.table.individual <- c(m.recog.individual, m.credit.individual, m.benefit.individual)
names(m.table.individual) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

vis.cap.individual <- paste0("Robustness to individual controls only: State FE + individual characteristics")

vis.notes.individual <- paste0(
  "\\textit{Notes:} ",
  "This specification includes state fixed effects, survey wave controls, and individual-level characteristics, but excludes contextual covariates. ",
  "Each column reports a separate linear probability model. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley SEs (50 km threshold). ",
  "Individual covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

# Diagnostic table (not in paper)
if (isFALSE(pnas)) {
  create_proximity_table(
    models = m.table.individual,
    title = vis.cap.individual,
    notes = vis.notes.individual,
    coef_map = coefmap[grepl("^d_", names(coefmap))],
    column_labels = c("Visibility", "Credit Biden", "Benefits"),
    filename = here("output", "pnas", "tables", "tab_proximity_effect_individual_only.tex"),
    label = "tab:proximity_effect_individual_only",
    resize_width = .65
  )
}

## State FE + contextual factors----
message("=== Specification 3: State FE + Contextual Factors ===")

# Visibility models (use unique model_name to avoid cache collision)
m.recog.contextual <- lapply(var.list, function(x) est_model(varname = x, data.in = g, covar.list = covar.contextual,
                                                              model_name = paste0("greenproj_bin_", x, "_conley_50_contextual")))
names(m.recog.contextual) <- var.list

# Credit models
m.credit.contextual <- lapply(var.list, function(x) est_model(varname = x, outcome = "credit_biden_bin", data.in = g, covar.list = covar.contextual,
                                                               model_name = paste0("credit_biden_bin_", x, "_conley_50_contextual")))
names(m.credit.contextual) <- var.list

# Benefit models
m.benefit.contextual <- lapply(var.list, function(x) est_model(varname = x, outcome = "greenbenefit_bin", data.in = g, covar.list = covar.contextual,
                                                                model_name = paste0("greenbenefit_bin_", x, "_conley_50_contextual")))
names(m.benefit.contextual) <- var.list

# Combine for table
m.table.contextual <- c(m.recog.contextual, m.credit.contextual, m.benefit.contextual)
names(m.table.contextual) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

vis.cap.contextual <- paste0("Robustness to contextual controls only: State FE + contextual factors")

vis.notes.contextual <- paste0(
  "\\textit{Notes:} ",
  "This specification includes state fixed effects, survey wave controls, and contextual/economic characteristics, but excludes individual covariates. ",
  "Each column reports a separate linear probability model. ",
  "Models 1–2: outcome = 1 if the respondent reports a local green project, 0 otherwise. ",
  "Models 3–4: outcome = 1 if the respondent credits the Biden Administration for local green investments. ",
  "Models 5–6: outcome = 1 if the respondent perceives a benefit from local green projects. ",
  "Estimates are OLS with Conley SEs (50 km threshold). ",
  "Contextual covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

# Diagnostic table (not in paper)
if (isFALSE(pnas)) {
  create_proximity_table(
    models = m.table.contextual,
    title = vis.cap.contextual,
    notes = vis.notes.contextual,
    column_labels = c("Visibility", "Credit Biden", "Benefits"),
    filename = here("output", "pnas", "tables", "tab_proximity_effect_contextual_only.tex"),
    label = "tab:proximity_effect_contextual_only",
    coef_map = coefmap[grepl("^d_", names(coefmap))],
    resize_width = .65
  )
}

# Proximity effect on other credit claiming outcomes----

# Governor credit
summary(g$credit_gov_bin)
m.gov <- lapply(var.list, est_model, outcome = "credit_gov_bin", data.in = g)
names(m.gov) <- var.list

# State lawmakers credit
summary(g$credit_state_bin)
m.state <- lapply(var.list, est_model, outcome = "credit_state_bin", data.in = g)
names(m.state) <- var.list

# Congressional credit
summary(g$credit_cong_bin)
m.cong <- lapply(var.list, est_model, outcome = "credit_cong_bin", data.in = g)
names(m.cong) <- var.list

# Community credit
summary(g$credit_com_bin)
m.com <- lapply(var.list, est_model, outcome = "credit_com_bin", data.in = g)
names(m.com) <- var.list

# Market credit
summary(g$credit_market_bin)
m.market <- lapply(var.list, est_model, outcome = "credit_market_bin", data.in = g)
names(m.market) <- var.list

# Combine all models
m.all <- c(m.gov, m.state, m.cong, m.com, m.market)
names(m.all) <- c("Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing", "Renewables", "Manufacturing")

cap.all <- paste0(
  "Robustness to different credit attribution outcomes: ",
  "Linear probability models of credit attribution."
)

notes.all <- paste0(
  "\\textit{Notes:} ",
  "Each column reports a separate linear probability model. ",
  "Unit of analysis is the individual survey respondent. ",
  "Outcome = 1 if the respondent credits the column header actor for local green investments. ",
  "Estimates are OLS with Conley standard errors (50 km threshold). ",
  "Continuous covariates are standardized. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

create_proximity_table(
  models = m.all,
  title = cap.all,
  notes = notes.all,
  coef_map = coefmap[grepl("^d_", names(coefmap))],
  column_labels = c("Governor", "State lawmakers", "Congress", "Local officials", "Markets"),
  filename = here("output", "pnas", "tables", "tab_S17_proximity_effect_all_credit.tex"),
  label = "tab:proximity_effect_all_credit",
  resize_width = 1,
  resize_direction = "both"
)

# Save vcov cache at end of script (only in full mode)
if (REPLICATION_MODE == "full") {
  save_vcov_cache()
  message("Vcov matrices cached for future anonymized replication")
}